#File Name: bootstrap_figures.R
#Data: SFGSII_mdr_exp_paper_forR.dta from main_text.do
#Purpose: Bootstrap confidence intervals and plot figures for expertise paper
#Output: Figures with bootstrapped confidence intervals 
#Date: 5/13/2017


setwd("G:/")
library(readstata13)
library(MASS)
data<-read.dta13("Data/mdr/expertise_paper/SFGSII_mdr_exp_paper_forR.dta",convert.factors = F)

#order the variables
data$inv_outside_exp<-as.ordered(data$inv_outside_exp)
data$inv_training<-as.ordered(data$inv_training)
data$leave_likelihood<-as.ordered(data$leave_likelihood)

##############################################
#define a function to collect the coefficients
##############################################

boot.model <- function(var,n,data.set){
  
  iv_data<-data.set[,var]
  
  op<-polr(iv_data~politicized_app+div_ip+approach_job+ses+as_exp+yrs_all_pos+contact_appointees+value_policy_influence+value_move_pvt+value_move_higher_fed,
           method="probit",Hess=T,data=data.set)
  d.boot<-op$model
  
  boot.coef<-matrix(NA,nrow=n,ncol=length(op$coefficients))
  boot.tau<-matrix(NA,nrow=n,ncol=length(op$zeta))
  
  for(i in 1:n){
    sample<-sample(seq(1, nrow(d.boot)), size=nrow(d.boot), replace=T)
    sample.boot<-d.boot[sample,]
    
    op<-polr(iv_data~politicized_app+div_ip+approach_job+ses+as_exp+yrs_all_pos+contact_appointees+value_policy_influence+value_move_pvt+value_move_higher_fed, 
             method="probit",Hess=T,data=sample.boot)
    
    boot.coef[i,]<-op$coefficients
    boot.tau[i,]<-op$zeta
  }
  
  out<-cbind(boot.coef,boot.tau)
  
  return(out)
  
}

##############################################
#function to get predicted probabilities for full set
##############################################

boot.pred.all<-function(pol,div,job,ses,as_exp,yrs,contact,policy,pvt,gov,n,model){
  
  holder<-matrix(NA,nrow=n,ncol=6)
  vars<-c(pol,div,job,ses,as_exp,yrs,contact,policy,pvt,gov)
  
  for(i in 1:n){
    holder[i,1]<-1-pnorm(model[i,15]-(sum(vars*model[i,1:10]))) #daily
    holder[i,2]<-pnorm(model[i,15]-(sum(vars*model[i,1:10])))-pnorm(model[i,14]-(sum(vars*model[i,1:10]))) #weekly
    holder[i,3]<-pnorm(model[i,14]-(sum(vars*model[i,1:10])))-pnorm(model[i,13]-(sum(vars*model[i,1:10]))) #monthly
    holder[i,4]<-pnorm(model[i,13]-(sum(vars*model[i,1:10])))-pnorm(model[i,12]-(sum(vars*model[i,1:10]))) #few times
    holder[i,5]<-pnorm(model[i,12]-(sum(vars*model[i,1:10])))-pnorm(model[i,11]-(sum(vars*model[i,1:10]))) #rarely
    holder[i,6]<-pnorm(model[i,11]-(sum(vars*model[i,1:10]))) #never
  }
  
  return(holder)
  
}

##############################################
#Get bootstrapped data for investing in outside expertise
##############################################

sub<-data[data$sample_inv_outside_exp==1,]
model.outside<-boot.model(var="inv_outside_exp",n=10000,data.set=sub)

out0<-boot.pred.all(pol=0,div=0.87,job=0,as_exp=0.1372,ses=0,yrs=17.32,contact=4,policy=4,gov=3,pvt=1,n=10000,model=model.outside)
out1<-boot.pred.all(pol=1,div=0.87,job=0,as_exp=0.1372,ses=0,yrs=17.32,contact=4,policy=4,gov=3,pvt=1,n=10000,model=model.outside)
out2<-boot.pred.all(pol=2,div=0.87,job=0,as_exp=0.1372,ses=0,yrs=17.32,contact=4,policy=4,gov=3,pvt=1,n=10000,model=model.outside)
out3<-boot.pred.all(pol=3,div=0.87,job=0,as_exp=0.1372,ses=0,yrs=17.32,contact=4,policy=4,gov=3,pvt=1,n=10000,model=model.outside)

out.prob<-matrix(nrow=4,ncol=3)
colnames(out.prob)<-c("mean","lb","ub")
rownames(out.prob)<-0:3

out.prob[1,1]<-mean(out0[,5]+out0[,6])
out.prob[1,2:3]<-quantile(out0[,5]+out0[,6],probs=c(0.025,0.975))

out.prob[2,1]<-mean(out1[,5]+out1[,6])
out.prob[2,2:3]<-quantile(out1[,5]+out1[,6],probs=c(0.025,0.975))

out.prob[3,1]<-mean(out2[,5]+out2[,6])
out.prob[3,2:3]<-quantile(out2[,5]+out2[,6],probs=c(0.025,0.975))

out.prob[4,1]<-mean(out3[,5]+out3[,6])
out.prob[4,2:3]<-quantile(out3[,5]+out3[,6],probs=c(0.025,0.975))

##############################################
#update the function for investing in training
##############################################

boot.pred.all<-function(pol,div,job,ses,as_exp,yrs,contact,policy,pvt,gov,n,model){
  
  holder<-matrix(NA,nrow=n,ncol=4)
  vars<-c(pol,div,job,ses,as_exp,yrs,contact,policy,pvt,gov)
  
  for(i in 1:n){
    holder[i,1]<-1-pnorm(model[i,13]-(sum(vars*model[i,1:10]))) #monthly
    holder[i,2]<-pnorm(model[i,13]-(sum(vars*model[i,1:10])))-pnorm(model[i,12]-(sum(vars*model[i,1:10]))) #few times
    holder[i,3]<-pnorm(model[i,12]-(sum(vars*model[i,1:10])))-pnorm(model[i,11]-(sum(vars*model[i,1:10]))) #rarely
    holder[i,4]<-pnorm(model[i,11]-(sum(vars*model[i,1:10]))) #never
  }
  
  return(holder)
  
}

##############################################
#Get bootstrapped data for investing in training
##############################################

sub<-data[data$sample_inv_training==1,]
model.train<-boot.model(var="inv_training",n=10000,data.set=sub)

train0<-boot.pred.all(pol=0,div=0.87,job=0,as_exp=0.1372,ses=0,yrs=17.32,contact=4,policy=4,gov=3,pvt=1,n=10000,model=model.train)
train1<-boot.pred.all(pol=1,div=0.87,job=0,as_exp=0.1372,ses=0,yrs=17.32,contact=4,policy=4,gov=3,pvt=1,n=10000,model=model.train)
train2<-boot.pred.all(pol=2,div=0.87,job=0,as_exp=0.1372,ses=0,yrs=17.32,contact=4,policy=4,gov=3,pvt=1,n=10000,model=model.train)
train3<-boot.pred.all(pol=3,div=0.87,job=0,as_exp=0.1372,ses=0,yrs=17.32,contact=4,policy=4,gov=3,pvt=1,n=10000,model=model.train)

train.prob<-matrix(nrow=4,ncol=3)
colnames(train.prob)<-c("mean","lb","ub")
rownames(train.prob)<-0:3

train.prob[1,1]<-mean(train0[,3]+train0[,4])
train.prob[1,2:3]<-quantile(train0[,3]+train0[,4],probs=c(0.023,0.973))

train.prob[2,1]<-mean(train1[,3]+train1[,4])
train.prob[2,2:3]<-quantile(train1[,3]+train1[,4],probs=c(0.023,0.973))

train.prob[3,1]<-mean(train2[,3]+train2[,4])
train.prob[3,2:3]<-quantile(train2[,3]+train2[,4],probs=c(0.023,0.973))

train.prob[4,1]<-mean(train3[,3]+train3[,4])
train.prob[4,2:3]<-quantile(train3[,3]+train3[,4],probs=c(0.023,0.973))

##############################################
#Redefine functions for exit
##############################################

##############################################
#define a function to collect the coefficients
##############################################

boot.model <- function(var,n,data.set){
  
  iv_data<-data.set[,var]
  
  op<-polr(iv_data~politicized_app+div_ip+approach_job+ses+as_exp+yrs_all_pos+contact_appointees+value_policy_influence+value_move_pvt+value_move_higher_fed+retire,
           method="probit",Hess=T,data=data.set)
  d.boot<-op$model
  
  boot.coef<-matrix(NA,nrow=n,ncol=length(op$coefficients))
  boot.tau<-matrix(NA,nrow=n,ncol=length(op$zeta))
  
  for(i in 1:n){
    sample<-sample(seq(1, nrow(d.boot)), size=nrow(d.boot), replace=T)
    sample.boot<-d.boot[sample,]
    
    op<-polr(iv_data~politicized_app+div_ip+approach_job+ses+as_exp+yrs_all_pos+contact_appointees+value_policy_influence+value_move_pvt+value_move_higher_fed+retire, 
             method="probit",Hess=T,data=sample.boot)
    
    boot.coef[i,]<-op$coefficients
    boot.tau[i,]<-op$zeta
  }
  
  out<-cbind(boot.coef,boot.tau)
  
  return(out)
  
}

##############################################
#function to get predicted probabilities for full set
##############################################

boot.pred.all<-function(pol,div,job,ses,as_exp,yrs,contact,policy,pvt,gov,retire,n,model){
  
  holder<-matrix(NA,nrow=n,ncol=4)
  vars<-c(pol,div,job,ses,as_exp,yrs,contact,policy,pvt,gov,retire)
  
  for(i in 1:n){
    holder[i,1]<-1-pnorm(model[i,14]-(sum(vars*model[i,1:11]))) #very likely
    holder[i,2]<-pnorm(model[i,14]-(sum(vars*model[i,1:11])))-pnorm(model[i,13]-(sum(vars*model[i,1:11]))) #likely
    holder[i,3]<-pnorm(model[i,13]-(sum(vars*model[i,1:11])))-pnorm(model[i,12]-(sum(vars*model[i,1:11]))) #unlikely
    holder[i,4]<-pnorm(model[i,12]-(sum(vars*model[i,1:11]))) #very unlikely
  }
  
  return(holder)
  
}

sub<-data[data$sample3==1,]
model.exit<-boot.model(var="leave_likelihood",n=10000,data.set=sub)

exit0<-boot.pred.all(pol=0,div=0.87,job=0,as_exp=0.1372,ses=0,yrs=17.32,contact=4,policy=4,gov=3,pvt=1,retire=0,n=10000,model=model.exit)
exit1<-boot.pred.all(pol=1,div=0.87,job=0,as_exp=0.1372,ses=0,yrs=17.32,contact=4,policy=4,gov=3,pvt=1,retire=0,n=10000,model=model.exit)
exit2<-boot.pred.all(pol=2,div=0.87,job=0,as_exp=0.1372,ses=0,yrs=17.32,contact=4,policy=4,gov=3,pvt=1,retire=0,n=10000,model=model.exit)
exit3<-boot.pred.all(pol=3,div=0.87,job=0,as_exp=0.1372,ses=0,yrs=17.32,contact=4,policy=4,gov=3,pvt=1,retire=0,n=10000,model=model.exit)

exit.prob<-matrix(nrow=4,ncol=3)
colnames(exit.prob)<-c("mean","lb","ub")
rownames(exit.prob)<-0:3

exit.prob[1,1]<-mean(exit0[,1]+exit0[,2])
exit.prob[1,2:3]<-quantile(exit0[,1]+exit0[,2],probs=c(0.025,0.975))

exit.prob[2,1]<-mean(exit1[,1]+exit1[,2])
exit.prob[2,2:3]<-quantile(exit1[,1]+exit1[,2],probs=c(0.025,0.975))

exit.prob[3,1]<-mean(exit2[,1]+exit2[,2])
exit.prob[3,2:3]<-quantile(exit2[,1]+exit2[,2],probs=c(0.025,0.975))

exit.prob[4,1]<-mean(exit3[,1]+exit3[,2])
exit.prob[4,2:3]<-quantile(exit3[,1]+exit3[,2],probs=c(0.025,0.975))

##############################################
#Redefine functions for politicization
##############################################

##############################################
#define a function to collect the coefficients
##############################################

boot.model <- function(var,n,data.set){
  
  iv_data<-data.set[,var]
  
  op<-polr(iv_data~div_ip+ses+yrs_all_pos+contact_appointees,
           method="probit",Hess=T,data=data.set)
  d.boot<-op$model
  
  boot.coef<-matrix(NA,nrow=n,ncol=length(op$coefficients))
  boot.tau<-matrix(NA,nrow=n,ncol=length(op$zeta))
  
  for(i in 1:n){
    sample<-sample(seq(1, nrow(d.boot)), size=nrow(d.boot), replace=T)
    sample.boot<-d.boot[sample,]
    
    op<-polr(iv_data~div_ip+ses+yrs_all_pos+contact_appointees, 
             method="probit",Hess=T,data=sample.boot)
    
    boot.coef[i,]<-op$coefficients
    boot.tau[i,]<-op$zeta
  }
  
  out<-cbind(boot.coef,boot.tau)
  
  return(out)
  
}

##############################################
#function to get predicted probabilities for full set
##############################################

boot.pred.all<-function(div,ses,yrs,contact,n,model){
  
  holder<-matrix(NA,nrow=n,ncol=5)
  vars<-c(div,ses,yrs,contact)
  
  for(i in 1:n){
    holder[i,1]<-1-pnorm(model[i,12]-(sum(vars*model[i,1:4]))) #4
    holder[i,2]<-pnorm(model[i,12]-(sum(vars*model[i,1:4])))-pnorm(model[i,11]-(sum(vars*model[i,1:4]))) #3
    holder[i,3]<-pnorm(model[i,11]-(sum(vars*model[i,1:4])))-pnorm(model[i,10]-(sum(vars*model[i,1:4]))) #2
    holder[i,4]<-pnorm(model[i,10]-(sum(vars*model[i,1:4])))-pnorm(model[i,9]-(sum(vars*model[i,1:4]))) #1
    holder[i,5]<-pnorm(model[i,9]-(sum(vars*model[i,1:4])))-pnorm(model[i,8]-(sum(vars*model[i,1:4]))) #0
    
  }
  
  return(holder)
  
}

data$politicized_app<-as.ordered(data$politicized_app)

sub<-data[data$sample1==1,]
model.pol<-boot.model(var="politicized_app",n=10000,data.set=sub)

pol0<-boot.pred.all(div=0,ses=0,yrs=17.32,contact=4,n=10000,model=model.pol)
pol1<-boot.pred.all(div=1,ses=0,yrs=17.32,contact=4,n=10000,model=model.pol)
pol2<-boot.pred.all(div=2,ses=0,yrs=17.32,contact=4,n=10000,model=model.pol)
pol3<-boot.pred.all(div=3,ses=0,yrs=17.32,contact=4,n=10000,model=model.pol)


pol.prob<-matrix(nrow=4,ncol=3)
colnames(pol.prob)<-c("mean","lb","ub")
rownames(pol.prob)<-0:3

pol.prob[1,1]<-mean(pol0[,1]+pol0[,2]+pol0[,3])
pol.prob[1,2:3]<-quantile(pol0[,1]+pol0[,2]+pol0[,3],probs=c(0.025,0.975))

pol.prob[2,1]<-mean(pol1[,1]+pol1[,2]+pol1[,3])
pol.prob[2,2:3]<-quantile(pol1[,1]+pol1[,2]+pol1[,3],probs=c(0.025,0.975))

pol.prob[3,1]<-mean(pol2[,1]+pol2[,2]+pol2[,3])
pol.prob[3,2:3]<-quantile(pol2[,1]+pol2[,2]+pol2[,3],probs=c(0.025,0.975))

pol.prob[4,1]<-mean(pol3[,1]+pol3[,2]+pol3[,3])
pol.prob[4,2:3]<-quantile(pol3[,1]+pol3[,2]+pol3[,3],probs=c(0.025,0.975))


##############################################
#save the bootstraps
##############################################

rm(data)

save.image(file="G:/Data/mdr/expertise_paper/boot_strap.RData")

##############################################
#Draw the figure
##############################################

xl=3
yl=0.55
cx=1.2
off=.25

#Draw the figure in .tif for JOP publication

tiff(file="C:/Users/mdr/Dropbox/Papers/expertise/Paper/richardson_politicization_figure1.tif",width=14,height=14, units="in",
     res=800)
par(mfrow=c(2,2),cex=1.5,las=1,mar=c(4,4,4,1),cex.lab=1.3, cex.axis=1.3)

plot(0:3,pol.prob[,1],type="b",pch=19,ylim=c(0,yl),xlim=c(0,xl),ylab="Predicted Probability",xlab="Preference Divergence",
     main="Perceive Agency is Politicized",xaxt="n")
segments(0:3,pol.prob[,2],0:3,pol.prob[,3])
axis(side=1, at=0:3)
text(0+off,pol.prob[1,1],labels=round(pol.prob[1,1],2),pos=3, cex=cx)
text(3-off,pol.prob[4,1],labels="0.30",pos=3, cex=cx) #round(pol.prob[4,1],2)
abline(h=0,lty=2)

plot(0:3,exit.prob[,1],type="b",pch=19,ylim=c(0,yl),xlim=c(0,xl),ylab="Predicted Probability",xlab="Politicization",
     main="Likely or Very Likely to Exit",xaxt="n")
segments(0:3,exit.prob[,2],0:3,exit.prob[,3])
axis(side=1, at=0:3)
text(0+off,exit.prob[1,1],labels=round(exit.prob[1,1],2),pos=3, cex=cx)
text(3-off,exit.prob[4,1],labels=round(exit.prob[4,1],2),pos=3, cex=cx)
abline(h=0,lty=2)

#plot(0,0,type="n",xlab="",ylab="",xaxt="n",yaxt="n",bty="n")

plot(0:3,out.prob[,1],type="b",pch=19,ylim=c(0,yl),xlim=c(0,xl),ylab="Predicted Probability",xlab="Politicization",
     main="Rarely or Never Discuss Policy with\nOutside Experts",xaxt="n")
segments(0:3,out.prob[,2],0:3,out.prob[,3])
axis(side=1, at=0:3)
text(0+off,out.prob[1,1],labels=round(out.prob[1,1],2),pos=3, cex=cx)
text(3-off,out.prob[4,1],labels=round(out.prob[4,1],2),pos=3, cex=cx)
abline(h=0,lty=2)

plot(0:3,train.prob[,1],type="b",pch=19,ylim=c(0,yl),xlim=c(0,xl),ylab="Predicted Probability",xlab="Politicization",
     main="Rarely or Never Attend Training\nand Seminars",xaxt="n")
segments(0:3,train.prob[,2],0:3,train.prob[,3])
axis(side=1, at=0:3)
text(0+off,train.prob[1,1],labels=round(train.prob[1,1],2),pos=3, cex=cx)
text(3-off,train.prob[4,1],labels=round(train.prob[4,1],2),pos=3, cex=cx)
abline(h=0,lty=2)

dev.off()

#Draw the figure in .png for JOP publication

png(file="C:/Users/mdr/Dropbox/Papers/expertise/Paper/richardson_politicization_figure1.png",
    width=14,height=14, units="in",res=1200)
par(mfrow=c(2,2),cex=1.5,las=1,mar=c(4,4,4,1),cex.lab=1.3, cex.axis=1.3)

plot(0:3,pol.prob[,1],type="b",pch=19,ylim=c(0,yl),xlim=c(0,xl),ylab="Predicted Probability",xlab="Preference Divergence",
     main="Perceive Agency is Politicized",xaxt="n")
segments(0:3,pol.prob[,2],0:3,pol.prob[,3])
axis(side=1, at=0:3)
text(0+off,pol.prob[1,1],labels=round(pol.prob[1,1],2),pos=3, cex=cx)
text(3-off,pol.prob[4,1],labels="0.30",pos=3, cex=cx) #round(pol.prob[4,1],2)
abline(h=0,lty=2)

plot(0:3,exit.prob[,1],type="b",pch=19,ylim=c(0,yl),xlim=c(0,xl),ylab="Predicted Probability",xlab="Politicization",
     main="Likely or Very Likely to Exit",xaxt="n")
segments(0:3,exit.prob[,2],0:3,exit.prob[,3])
axis(side=1, at=0:3)
text(0+off,exit.prob[1,1],labels=round(exit.prob[1,1],2),pos=3, cex=cx)
text(3-off,exit.prob[4,1],labels=round(exit.prob[4,1],2),pos=3, cex=cx)
abline(h=0,lty=2)

#plot(0,0,type="n",xlab="",ylab="",xaxt="n",yaxt="n",bty="n")

plot(0:3,out.prob[,1],type="b",pch=19,ylim=c(0,yl),xlim=c(0,xl),ylab="Predicted Probability",xlab="Politicization",
     main="Rarely or Never Discuss Policy with\nOutside Experts",xaxt="n")
segments(0:3,out.prob[,2],0:3,out.prob[,3])
axis(side=1, at=0:3)
text(0+off,out.prob[1,1],labels=round(out.prob[1,1],2),pos=3, cex=cx)
text(3-off,out.prob[4,1],labels=round(out.prob[4,1],2),pos=3, cex=cx)
abline(h=0,lty=2)

plot(0:3,train.prob[,1],type="b",pch=19,ylim=c(0,yl),xlim=c(0,xl),ylab="Predicted Probability",xlab="Politicization",
     main="Rarely or Never Attend Training\nand Seminars",xaxt="n")
segments(0:3,train.prob[,2],0:3,train.prob[,3])
axis(side=1, at=0:3)
text(0+off,train.prob[1,1],labels=round(train.prob[1,1],2),pos=3, cex=cx)
text(3-off,train.prob[4,1],labels=round(train.prob[4,1],2),pos=3, cex=cx)
abline(h=0,lty=2)

dev.off()

##############################################
#Get marginal effects for text
##############################################

#politicization
p0<-(pol1[,1]+pol1[,2]+pol1[,3])-(pol0[,1]+pol0[,2]+pol0[,3])
mean(p0)
quantile(p0,probs=c(0.025,0.975))

p1<-(pol2[,1]+pol2[,2]+pol2[,3])-(pol1[,1]+pol1[,2]+pol1[,3])
mean(p1)
quantile(p1,probs=c(0.025,0.975))

p2<-(pol3[,1]+pol3[,2]+pol3[,3])-(pol2[,1]+pol2[,2]+pol2[,3])
mean(p2)
quantile(p2,probs=c(0.025,0.975))

sum(mean(p0),mean(p1),mean(p2))

#exit
e0<-(exit1[,1]+exit1[,2])-(exit0[,1]+exit0[,2])
mean(e0)
quantile(e0,probs=c(0.025,0.04,0.975))

e1<-(exit2[,1]+exit2[,2])-(exit1[,1]+exit1[,2])
mean(e1)
quantile(e1,probs=c(0.025,0.04,0.975))

e2<-(exit3[,1]+exit3[,2])-(exit2[,1]+exit2[,2])
mean(e2)
quantile(e2,probs=c(0.025,0.04,0.975))

sum(mean(e0),mean(e1),mean(e2))

#outside
o0<-(out1[,5]+out1[,6])-(out0[,5]+out0[,6])
mean(o0)
quantile(o0,probs=c(0.025,0.975))

o1<-(out2[,5]+out2[,6])-(out1[,5]+out1[,6])
mean(o1)
quantile(o1,probs=c(0.025,0.975))

o2<-(out3[,5]+out3[,6])-(out2[,5]+out2[,6])
mean(o2)
quantile(o2,probs=c(0.025,0.975))

sum(mean(o0),mean(o1),mean(o2))

#training
z0<-(train1[,3]+train1[,4])-(train0[,3]+train0[,4])
mean(z0)
quantile(z0,probs=c(0.025,0.975))

z1<-(train2[,3]+train2[,4])-(train1[,3]+train1[,4])
mean(z1)
quantile(z1,probs=c(0.025,0.975))

z2<-(train3[,3]+train3[,4])-(train2[,3]+train2[,4])
mean(z2)
quantile(z2,probs=c(0.025,0.975))

sum(mean(z0),mean(z1),mean(z2))
